######################################################
######################################################
#### Figure 1: SFA versus Wordfish
######################################################
######################################################
rm(list=ls())
library(quanteda)

load('obj_words_112')
load("sfavswordfishrun")
load('dtm_112')
load('matrix112')

##Fit with wordfish
dtm.words<-dtm3[rownames(dtm3)=="",]

rownames(dtm.words)<-1:nrow(dtm.words)
colnames(dtm.words)<-1:ncol(dtm.words)

set.seed(1)
wf1<-textmodel(as.dfm(dtm.words),model="wordfish")
wf.preds<-0*dtm.words

for(i in 1:nrow(dtm.words)) for(j in 1:ncol(dtm.words)) wf.preds[i,j]<-
	attr(wf1,"alpha")[i]+attr(wf1,"psi")[j]+attr(wf1,"theta")[i]*attr(wf1,"beta")[j]

wf.preds2<-exp(wf.preds)

##Format SFA obj
obj.curr$tau<-sparse1$tau
sparse1<-obj.curr
dtm.words<-t(dtm2)

##Generate figure

tocount<-function(x,taus){
	taus<-c(taus,2*max(taus))
	lower<-apply(x,c(1,2),FUN=function(x) max(which(taus<=x)))-1
	upper<-apply(x,c(1,2),FUN=function(x) min(which(taus>=x)))-1
	(upper+lower)/2
	#lower+(x-lower)/(upper-lower)
	
}

r1<-rowMeans(sparse1$Udim1)
c1<-rowMeans(sparse1$Vdim1)
d1<-mean(sparse1$D.mean[1,])
z1<-r1%*%t(c1)*d1
mean.mat<-0*dtm.words
r2<-rowMeans(sparse1$Z)
c2<-colMeans(sparse1$Z)
for(i in 1:nrow(sparse1$Z)) for(j in 1:ncol(sparse1$Z)) mean.mat[i,j]<-r2[i]+c2[j]
mean.mat<-mean.mat-mean(mean.mat)+mean(sparse1$Z)

	out1<-tocount(z1+mean.mat,sparse1$tau)
	dev<-function(x,words.mat=dtm.words) -2*(sum(dpois(words.mat,x,log=T)))

	dev(out1)/-2# -1709286
	dev(wf.preds2)/-2#-2173875

	mean(out1[dtm.words==0]<1)#0.6098743
	sum(out1[dtm.words==0]<1)#23532
	mean(wf.preds2[dtm.words==0]<1)#0.07863159
	sum(wf.preds2[dtm.words==0]<1)#3037
	
	
	
pdf("Fig1_SFA.pdf",h=6,w=8)
par(mar=c(3,4,3,.1))
boxplot(wf.preds2[dtm.words==0],out1[dtm.words==0],names=c("Poisson Link","SFA"))
mtext(side=2,"Fitted Value",line=3)
mtext(side=3,"Fitted Values for Observed Zero Counts, By Method",line=1,font=2,cex=1.25)

dev.off()

######################################################
######################################################
#### Section 3.2: Analyze vote data
######################################################
######################################################

rm(list=ls())
for(i in 105:112){
load(paste("obj_votes_",i,sep=""))
tab1<-table(colSums(obj.curr$D.sparse!=0))/ncol(obj.curr$D.sparse)
print(i)
print(tab1)
}

for(i in 105:112){
load(paste("obj_votes_",i,sep=""))
dim1<-obj.curr$Vdim1
c1<-apply(obj.curr$Vdim1,2,FUN=function(x) x*sign(cor(x,obj.curr$Vdim1[,1])))
est1<-rowMeans(c1)
dwnom<-read.csv(paste("dwnom",i,".csv",sep=""))
est2<-dwnom[,"V8"]
est2[abs(est2)>100]<-NA
print(i)
print(abs(cor(est1,est2,use="pairwise")))
}

######################################################
######################################################
#### Figures 2, 5: Tables of dimensionality
######################################################
######################################################


rm(list=ls())
#files.load<-list.files("../output")

type.obj<-c("both","words","votes")
table.all<-NULL

make.table<-function(obj.curr){
cm1<-colSums(obj.curr$D.sp!=0)
sapply(1:20,FUN=function(x) mean(x==cm1))
}

for(i in 1:length(type.obj)){
	type<-type.obj[i]
	tab.curr<-NULL
for(sen.num in 105:112){
	file.curr<-paste("obj",type,sen.num,sep="_")
	load(file.curr)
	tab.curr<-cbind(tab.curr,make.table(obj.curr))
	}
	colnames(tab.curr)<-paste("Senate",105:112)
	tab.curr<-cbind(rowMeans(tab.curr),tab.curr)
	colnames(tab.curr)[1]<-"Average"
	table.all[[i]]<-tab.curr	
}
names(table.all)<-type.obj

plot.table<-function(x){

xlim.use<-range(which(rowMeans(x)>0.00))
ylim.use<-c(0,max(x))

par(mfrow=c(3,3),mar=c(2,4,2,.1))

for(i in 1:9){

x.curr<-x[,i]
x.plot<-1:length(x.curr)
plot(0,0,type="n",xlim=xlim.use,ylim=ylim.use,xlab="",ylab="")
points(x.plot,x.curr,pch=19,cex=1.5)
segments(x0=x.plot,x1=x.plot,y0=0*x.plot,y1=x.curr,cex=1.3)
mtext(side=2,"Mass",line=2,font=2,cex=.6)
mtext(side=3,colnames(x)[i],cex=1,font=2)
}

}

pdf("Fig2_SFA.pdf",h=8,w=9);plot.table(table.all[[1]]);dev.off()
pdf("Fig5_SFA.pdf",h=8,w=9);plot.table(table.all[[2]]);dev.off()

######################################################
######################################################
#### Figures 3, 4: 112th Senate cut lines and Word densities
######################################################
######################################################

rm(list=ls())
load("obj_both_112")
load("dtm_112")
load("votemat_112")
scale.text<-obj.curr
r1<-cor(scale.text$Udim1,scale.text$Udim1[,1])
r2<-cor(scale.text$Udim2,scale.text$Udim2[,1])
orient1<-as.vector(ifelse(r1>0,-1,1))
orient2<-as.vector(ifelse(r2>0,-1,1))


dwnom<-read.csv("dwnom112.csv")
to01<-function(x) (x-min(x))/(max(x)-min(x))

nom.rb<-to01(dwnom$V8)

names.sen<-toupper(dwnom$V3)

dim1<-colMeans(t(scale.text$Vdim1)*(orient1))
dim2<-colMeans(t(scale.text$Vdim2)*(orient2))
adj1<-sign(dim1[names.sen=="MCCONNELL"])
adj2<-sign(dim2[names.sen=="CRAPO"])


words1<-colMeans(t(scale.text$Udim1)*(orient1))[-c(1:ncol(votes.mat))]
words2<-colMeans(t(scale.text$Udim2)*(orient1))[-c(1:ncol(votes.mat))]
names(words1)<-names(words2)<-colnames(dtm2)

#Reorient
dim1<- adj1*dim1/sd(dim1)
words1<- adj1*words1/sd(words1)
dim2<- adj2*dim2/sd(dim2)
words2<- adj2*words2/sd(words2)

names(dim2)<-names(dim1)<-names.sen

leaders<-names.sen%in%c("MCCONNELL","SCHUMER","DURBIN","REID","ALEXANDER","KYL","THUNE")


##Figure
pdf("Fig4_SFA.pdf",h=6,w=13)
par(mfrow=c(1,2),mar=c(3,3,1.3,.2))
keeps<-!leaders#(dim2 > -2)
drops<-leaders
plot(dim1[keeps],dim2[keeps],col=rgb(nom.rb[keeps],0,1-nom.rb[keeps]),pch=19,
ylim=range(dim2),xlab="",ylab="")
text(names.sen[drops],x=dim1[drops],y=dim2[drops])
mtext(side=1,line=2,"First Dimension",font=2)
mtext(side=2,line=2,"Second Dimension",font=2)
mtext(side=3,"Policy and Leadership Dimensions",font=2)




centrists<-names.sen%in%c("COCHRAN","CRAPO","LUGAR","SHELBY","HELLER","BOOZMAN","KIRK","ENSIGN")

keeps<-!centrists#(dim2 > -2)
drops<-centrists
plot(dim1[keeps],dim2[keeps],col=rgb(nom.rb[keeps],0,1-nom.rb[keeps]),pch=19,
ylim=range(dim2),xlab="",ylab="")
text(names.sen[drops],x=dim1[drops],y=dim2[drops])
mtext(side=1,line=2,"First Dimension",font=2)
mtext(side=2,line=2,"Second Dimension",font=2)
mtext(side=3,"Identifying Moderates with Cutting Lines",font=2)
#abline(b= words1["boehner"]/words2["boehner"],a=-.5,lwd=2)
#abline(b= words1["fiscal cliff"]/words2["fiscal cliff"],a=-2.5,lwd=2)
#abline(b= words1["student loan"]/words2["student loan"],a=1,lwd=2)

b1<-words1["boehner"]/words2["boehner"]
b2<-words1["fiscal cliff"]/words2["fiscal cliff"]
b3<-words1["student loan"]/words2["student loan"]
abline(b=b1 ,a=-.5,lwd=2)
abline(b= b2,a=-.5*b2,lwd=2)
abline(b=b3 ,a=-.5*b3,lwd=2)

text("'Boehner'",x=.65,y=-1,pos=1,font=2)
arrows(x0=.65,y0=-1,x1=.57,y1=-.32,length=.1)


text("'fiscal cliff'",x=.4,y=-2.5,pos=1,font=2)
arrows(x0=.4,y0=-2.55,x1=.2, y1=-1.9 ,length=.1)

text("'student loan'",x=1.25,y=-2.7,pos=1,font=2)
arrows(x0=1.25,y0=-2.7,x1=1.65,y1=-2.3,length=.1)
dev.off()

##Correlation bw nominate and our locations
cor(dim1,nom.rb)

##Load in 108th Senate

load('obj_both_108')
load("dtm_108")
load('votemat_108')
scale.text<-obj.curr
r1<-cor(scale.text$Udim1,scale.text$Udim1[,1])
r2<-cor(scale.text$Udim2,scale.text$Udim2[,1])
orient1<-as.vector(ifelse(r1>0,-1,1))
orient2<-as.vector(ifelse(r2>0,-1,1))


words1.108<-colMeans(t(scale.text$Udim1)*(orient1))[-c(1:ncol(votes.mat))]
words2.108<-colMeans(t(scale.text$Udim2)*(orient1))[-c(1:ncol(votes.mat))]
names(words1.108)<-names(words2.108)<-colnames(dtm2)


## Orient so R's on right
nom108<-read.csv("dwnom108.csv")
nom112<-read.csv("dwnom112.csv")
words1.108<-words1.108*sign(rowMeans(scale.text$Vdim1)[grep("sessions",nom108[,"V3"])])
words1<-words1*sign(dim1[grep("sessions",nom112[,"V3"])])


plot.words<-function(x,y,words,dif=.25){
	nw<-length(words)
	text(rep(x,nw),y+dif*(1:nw),words )
}

pdf("Fig3_SFA.pdf",h=6,w=12.5)
par(mfrow=c(1,2),mar=c(3,3,2,.2))
##Plot 108th Senate
d1<-density(words1.108/sd(words1.108)*0.0655,bw=0.025)
plot(d1$x,log(d1$y),type="l",ylim=c(min(log(d1$y)),5),xlab="",ylab="",xlim=c(-.5,max(d1$x)+.07))
mtext(side=1,"Scale",font=2,line=2)
mtext(side=2,"Log Density",font=2,line=2)
mtext(side=3,"108th Senate",font=2,line=.2,cex=1.1)

points(x=words1.108/sd(words1.108)*0.0655,y=-9+0*words1.108,pch="|")
cluster1<-names(sort(words1.108,dec=T))[1:5]
cluster2<-names(sort(abs(words1.108-.2)))[1:6]
cluster3<-names(sort(words1.108[abs(words1.108)<0.0004],dec=T))[1:5]
cluster4<-names(sort(abs(words1.108+.217),dec=F))[1:5]
cluster5<-names(sort(words1.108))[1:5]

plot.words(-.41, -4.7,cluster1,.4)
plot.words(-.25, -2.2,cluster2,.4)
plot.words(0, 2,cluster3,.4)
plot.words(.28, -1.8,cluster4,.4)
plot.words(.43, -5,cluster5,.4)

#arrows(x0=.2,y0=0,x1=.22,y1=-1.3,len=.1)

##Plot 112th Senate
d1<-density(words1/sd(words1)*0.0655,bw=0.02)
plot(d1$x,log(d1$y),type="l",xlim=c(-.475,max(d1$x)+.07),ylim=c(min(log(d1$y)),4),xlab="",ylab="")
mtext(side=1,"Scale",font=2,line=2)
mtext(side=2,"Log Density",font=2,line=2)
mtext(side=3,"112th Senate",font=2,line=.2,cex=1.1)

points(x=words1/sd(words1)*0.0655,y=-8.6+0*words1,pch="|")
sort(words1)[1:5]
cluster1<-names(sort(words1))[1:5]
cluster2<-names(sort(words1))[6:10]
cluster3<-names(sort(words1,dec=T))[1:5]
cluster4<-names(sort(words1[abs(words1)<sort(abs(words1))[4]],dec=T))

plot.words(-.385, -4.1,cluster1,.35)
plot.words(-.2, -4.2,cluster2,-.35)
plot.words(.35, -4.1,cluster3,.35)
plot.words(0, 1.9,cluster4,.35)

arrows(x0=-.2,y0=-4.2,x1=-.26,y1=-2.9,len=.1)
dev.off()

######################################################
######################################################
#### Figures 7: Imputing off leaders
######################################################
######################################################
rm(list=ls())

load('ImputeExercisesFit')
impute.obj<-scale.text.lead

load("obj_both_112")
obj<-obj.curr

dwnom<-read.csv("dwnom112.csv")
nomscore<-dwnom$V8
nomscore<-(nomscore-min(nomscore))/(max(nomscore)-min(nomscore))
party<-dwnom$V6
names.sen<-toupper(dwnom$V3)

##Put on same scale
scale.sd<-function(x) x/(sd(rowMeans(x)))
obj$Vdim1<-scale.sd(obj$Vdim1)
obj$Vdim2<-scale.sd(obj$Vdim2)
impute.obj$Vdim1<-scale.sd(impute.obj$Vdim1)
impute.obj$Vdim2<-scale.sd(impute.obj$Vdim2)



which.lead.drop<-sapply(c("REID","MCCONN","KYL","SCHUMER"),FUN=function(x)grep(x,toupper(dwnom$V3)))
adj1<-sign(rowMeans(obj$Vdim1))[names.sen=="MCCONNELL"]
adj2<-sign(rowMeans(obj$Vdim2))[names.sen=="MCCONNELL"]
adj3<-sign(rowMeans(impute.obj$Vdim1))[names.sen=="MCCONNELL"]
adj4<-sign(rowMeans(impute.obj$Vdim2))[names.sen=="MCCONNELL"]

pdf("Fig7_SFA.pdf",h=6,w=13)
par(mfrow=c(1,2),mar=c(3,3,1.3,.2))
lims.use<-range(c(adj1*rowMeans(obj$Vdim1),adj4*rowMeans(impute.obj$Vdim2)))
plot(adj1*rowMeans(obj$Vdim1),adj4*rowMeans(impute.obj$Vdim2),xlab="",ylab="",type="n",xlim=lims.use,ylim=lims.use)
abline(c(0,1),col=gray(.5))
points(adj1*rowMeans(obj$Vdim1),adj4*rowMeans(impute.obj$Vdim2),col=rgb(nomscore,0,1-nomscore),cex=1.3)
points(adj1*rowMeans(obj$Vdim1)[which.lead.drop],adj4*rowMeans(impute.obj$Vdim2)[which.lead.drop],col=rgb(nomscore,0,1-nomscore),pch=19,cex=1.3)
legend("topleft",col=c("red","blue"),legend=c("Republican Leaders","Democratic Leaders","Republican Rank and File", "Democratic Rank and File"),pch=c(19,19,1,1),bty="n")
mtext(side=1,"First Dimension (All Data)",line=2)
mtext(side=2,"Second Dimension (Imputed)",line=2)
mtext(side=3,"Ideology Dimension",line=.2,font=2)

lims.use<-range(c(adj2*rowMeans(obj$Vdim2),adj3*rowMeans(impute.obj$Vdim1)))

plot(adj2*rowMeans(obj$Vdim2),adj3*rowMeans(impute.obj$Vdim1),xlab="",ylab="",type="n",xlim=lims.use,ylim=lims.use)
abline(c(0,1),col=gray(.5))

points(adj2*rowMeans(obj$Vdim2),adj3*rowMeans(impute.obj$Vdim1),col=rgb(nomscore,0,1-nomscore),cex=1.3)
points(adj2*rowMeans(obj$Vdim2)[which.lead.drop],adj3*rowMeans(impute.obj$Vdim1)[which.lead.drop],col=rgb(nomscore,0,1-nomscore),pch=19,cex=1.3)

mtext(side=1,"First Dimension (All Data)",line=2)
mtext(side=2,"Second Dimension (Imputed)",line=2)
mtext(side=3,"Leadership Dimension",line=.2,font=2)


legend("topleft",col=c("red","blue"),legend=c("Republican Leaders","Democratic Leaders","Republican Rank and File", "Democratic Rank and File"),pch=c(19,19,1,1),bty="n")
dev.off()

##Correlations in text
cor(nomscore,rowMeans(impute.obj$Vdim2))
#[1] 0.7997775
cor(nomscore[party!=200],rowMeans(impute.obj$Vdim2)[party!=200])
#[1] 0.5306071
cor(nomscore[party==200],rowMeans(impute.obj$Vdim2)[party==200])
#[1] 0.4035092

######################################################
######################################################
#### Figures 6: Word dimensions by Senate
######################################################
######################################################

rm(list=ls())
load("obj_words_112")
load("dtm_112")

#load("../fig/wordsDims.RData")

list.out<-obj.curr
list.out$U<-list.out$U.all
rownames(list.out$U)<-colnames(dtm2)

## This code replicates the Figure entitled "Extreme Terms by Dimenions, 112th Senate"

#load("./wordsDims.RData")

pdf(file="Fig6_SFA.pdf",
    width=10, height=8)

step_size <- 3
distance <- 30
n_dims <- 6
n_top <- 10 
n_bottom <- 10 
n_empty <- 3
n_words <- n_top+n_bottom+n_empty
ytop <- (n_top+n_bottom+n_empty)*step_size
ylim <- ytop+10




par(cex.axis=1)
plot(rep(1,n_words), seq(from=ytop, to=1, by=-step_size),
     type="n",
     xlim=c(0,distance*n_dims+distance), ylim=c(0,ylim),
     xaxt='n', yaxt='n',
     xlab="", ylab="",
     bty='n')

for(i in 1:n_dims){
    # flipping dimension 3 and 5 for presentation
    if(i==3){
        idx <- 5
    } else if(i==5){
        idx <- 3
    } else {
        idx <- i
    }

    idx_last10 <- c(dim(list.out$U)[1]:(dim(list.out$U)[1]-9))
    
    yloc.i <- seq(from=ytop, to=1, by=-step_size)
    top.cex <- abs(as.numeric(sort(list.out$U[,idx])[idx_last10]))
    bottom.cex <- abs(as.numeric(sort(list.out$U[,idx])[n_top:1]))
    if(i==1){
        bottom.cex <- abs(as.numeric(sort(list.out$U[,idx])[idx_last10]))
        top.cex <- abs(as.numeric(sort(list.out$U[,idx])[1:n_top]))
    }

    size.cex <- c(top.cex,rep(5,n_empty),bottom.cex)
    ## normalize
    size.cex_norm <- (sqrt(size.cex)/max(sqrt(size.cex)))*1.4
  ##Adjust some sizes for presentation
 if(i==3)     size.cex_norm <- (sqrt(size.cex)/max(sqrt(size.cex)))*1.4
 if(i==5)     size.cex_norm <- (sqrt(size.cex)/max(sqrt(size.cex)))*1.4

   
    bottom.words <- names(sort(list.out$U[,idx])[n_top:1])
    top.words <- names(sort(list.out$U[,idx])[idx_last10])
    if(i==1){
        idx_last10 <- c((dim(list.out$U)[1]-9):(dim(list.out$U)[1]))
        top.words <- names(sort(list.out$U[,idx])[1:n_top])
        bottom.words <- names(sort(list.out$U[,idx])[idx_last10])
    }
    empty.words <- rep(".",n_empty)

	if(i %in% c(3,6))		size.cex_norm[-c(11:14)]<-size.cex_norm[-c(11:14)]*3.7
	if(i==6) yloc.i<-rev(yloc.i)
	
	    text(rep(distance*i,n_words), yloc.i,
         c(top.words,empty.words,bottom.words),
         cex=size.cex_norm)
    
    text(distance*i+1,ylim-4, paste("Dimension ", i, sep=""),
         cex=1.1) # distance to 160
    rect(distance*i-12,ylim-7,distance*i+12,ylim-6,
         col = rgb(0.5,0.5,0.5,1/4), border=NA)#xleft,ybottom, xtop, ytop

}

axis(c("Words with Negative Level", "Words with Positive Level"),
       side=2,
       at=c(16,ylim/2+16), tick=FALSE, line=-4)

arrows(10,1,10,ylim/2-8, code=1,
       length=.1)

arrows(10,ylim/2+1,10,ylim-8, code=2,
       length=.1)


dev.off()